
source('funs/rifle-fun.R')
#### zone #### 
zone_df = read.csv('data/repression_officers.csv', 
                   header = T) %>%  
 select('zone_officer', 'zone_officer_rank', 
         'zone_officer_branch',
         'repression', 'zone', 'year', 'departament', 'dept_code', 'prov') %>%  
  group_by(zone, year, zone_officer) %>%  
  summarise(repression = sum(repression, na.rm = T)
            , 
            branch = paste(zone_officer_branch, collapse = ';'),
            rank = paste(zone_officer_rank, collapse = ';')
  ) %>%  
  mutate(branch = sub(";.*", "", branch), 
         rank = sub(";.*", "", rank)) %>%  
  na.omit()

zone_df = zone_df %>%  
  distinct() %>%  
  group_by(zone, zone_officer) %>%  
  mutate(n_year_lead = n()) %>%  
  group_by(zone, year) %>%  
  mutate(n_lead = length(unique(zone_officer)))  %>%  
  group_by(zone, year) %>%  
  summarise(leader_name = paste(zone_officer, collapse = ","), 
            n_lead = mean(n_lead, na.rm = T), 
            repression = mean(repression, na.rm = T),
            branch = paste(branch, collapse = ';'), 
            rank = paste(rank, collapse = ';')
  ) %>%  
  mutate(branch = sub(";.*", "", branch), 
         rank = sub(";.*", "", rank)) %>%  
  na.omit()

zone_df = zone_df %>%  
  group_by(zone) %>%  
  mutate(
    repression_change = (repression-mean(repression, na.rm = T))/sd(repression, na.rm = T)
  ) 
colnames(zone_df)[1] = 'unit'

#### subzone #### 

sz_df = read.csv('data/repression_officers.csv', 
                 header = T) %>%  
  select('subzone_officer', 'subzone_officer_rank', 
         'subzone_officer_branch',
         'repression', 'subzone', 'year', 'departament', 'dept_code', 'prov') %>%  
  group_by(subzone, year, subzone_officer) %>%  
  summarise(repression = sum(repression, na.rm = T)
            , 
            branch = paste(subzone_officer_branch, collapse = ';'),
            rank = paste(subzone_officer_rank, collapse = ';')
  ) %>%  
  mutate(branch = sub(";.*", "", branch), 
         rank = sub(";.*", "", rank)) %>%  
  na.omit()

sz_df = sz_df %>%  
  distinct() %>%  
  group_by(subzone, subzone_officer) %>%  
  mutate(n_year_lead = n()) %>%  
  group_by(subzone, year) %>%  
  mutate(n_lead = length(unique(subzone_officer)))  %>%  
  group_by(subzone, year) %>%  
  summarise(leader_name = paste(subzone_officer, collapse = ","), 
            n_lead = mean(n_lead, na.rm = T), 
            repression = mean(repression, na.rm = T),
            branch = paste(branch, collapse = ';'), 
            rank = paste(rank, collapse = ';')
  ) %>%  
  mutate(branch = sub(";.*", "", branch), 
         rank = sub(";.*", "", rank)) %>%  
  na.omit()

sz_df = sz_df %>%  
  group_by(subzone) %>%  
  mutate(
    repression_change = (repression-mean(repression, na.rm = T))/sd(repression, na.rm = T)
  )  
colnames(sz_df)[1] = 'unit'


#### area #### 
area_df = read.csv('data/repression_officers.csv', 
                   header = T) %>%  
  select('area_officer', 'area_officer_rank', 
         'area_officer_branch',
         'repression', 'area', 'year', 'departament', 'dept_code', 'prov') %>%  
  group_by(area, year, area_officer) %>%  
  summarise(repression = sum(repression, na.rm = T)
            , 
            branch = paste(area_officer_branch, collapse = ';'),
            rank = paste(area_officer_rank, collapse = ';')
  ) %>%  
  mutate(branch = sub(";.*", "", branch), 
         rank = sub(";.*", "", rank)) %>%  
  na.omit()

area_df = area_df %>%  
  distinct() %>%  
  group_by(area, area_officer) %>%  
  mutate(n_year_lead = n()) %>%  
  group_by(area, year) %>%  
  mutate(n_lead = length(unique(area_officer)))  %>%  
  group_by(area, year) %>%  
  summarise(leader_name = paste(area_officer, collapse = ","), 
            n_lead = mean(n_lead, na.rm = T), 
            repression = mean(repression, na.rm = T),
            branch = paste(branch, collapse = ';'), 
            rank = paste(rank, collapse = ';')
  ) %>%  
  mutate(branch = sub(";.*", "", branch), 
         rank = sub(";.*", "", rank)) %>%  
  na.omit()
colnames(area_df)[1] = 'unit'


area_df = area_df %>%  
  group_by(unit) %>%  
  mutate(
    repression_change = (repression-mean(repression, na.rm = T))/sd(repression, na.rm = T)
  ) 

#### area 2 #### 
area2_df = read.csv('data/repression_officers.csv', 
                    header = T) %>%  
  select('area_officer_2', 'area_officer_2_rank', 
         'area_officer_2_branch',
         'repression', 'area', 'year', 'departament', 'dept_code', 'prov') %>%  
  group_by(area, year, area_officer_2) %>%  
  summarise(repression = sum(repression, na.rm = T),
            branch = paste(area_officer_2_branch, collapse = ';'),
            rank = paste(area_officer_2_rank, collapse = ';')
  ) %>%  
  mutate(branch = sub(";.*", "", branch), 
         rank = sub(";.*", "", rank)) %>%  
  na.omit() %>%  
  filter(area_officer_2 != "") %>% 
  group_by(area, year) %>%  
  mutate(n_lead = length(unique(area_officer_2)))   

#colnames(area2_df)[1] = 'unit'
#area2_df$area_officer_2 = NULL

area2_df = area2_df %>%  
  group_by(area, area_officer_2) %>%  
  summarise(second_leader_area = n())
colnames(area2_df)[1] = 'unit'

officer_unit_year = rbind.data.frame(area_df, sz_df, zone_df)
officer_unit_year = merge(officer_unit_year, area2_df, by = 'unit', 
                          all.x = T)
officer_unit_year = officer_unit_year %>%  
  filter(is.na(second_leader_area) == T)

officer_unit_year_sub = officer_unit_year %>%  
  group_by(unit) %>%  
  mutate(
    n_lead_mean = mean(n_lead)
  ) %>%  
  filter(n_lead_mean == 1)
officer_unit_year_sub$repression_change[is.na(officer_unit_year_sub$repression_change)]=0
m = lm(repression_change ~ factor(year), data = officer_unit_year_sub)
officer_unit_year_sub$resid = m$residuals

m = lm(repression_change ~ factor(leader_name), data = officer_unit_year_sub)

officer_unit_year_sub_test = officer_unit_year_sub %>%  
  group_by(leader_name) %>%  
  mutate(n_year_lead = n(), 
         drop_leader = as.numeric(n_year_lead==1)
  ) %>%  
  filter(drop_leader == 0)
m = lm(repression_change ~ factor(year), data = officer_unit_year_sub_test)
officer_unit_year_sub_test$resid = m$residuals
officer_unit_year_sub_test = officer_unit_year_sub %>%  
  filter(nchar(unit)!=2)

m = lm(repression_change ~ factor(leader_name), data = officer_unit_year_sub_test)
officer_unit_year_sub_test$reid = m$residuals

outcome_vector_year = rifle(officer_unit_year_sub_test, 'unit', 'year', 
                            'leader_name', 'repression_change', 999)
outcome_vector_plot = as.data.frame(outcome_vector_year[1:999])
colnames(outcome_vector_plot)[1] = 'values'

ggplot(outcome_vector_plot) + 
  geom_histogram(aes(values)) + 
  geom_vline(xintercept = 0.6810267, col = 'red', lty = 2) + 
  xlab('R-Squared') + 
  ylab('Count') + 
  theme_bw()
ggsave('fig-out/rifle.pdf')
